# Librarieslibrary(tidyverse)library(gmRi)library(scales)library(gt)library(patchwork)library(gtsummary)library(gt)library(sizeSpectra)library(gganimate)library(glue)library(merTools)library(nlme)library(ggeffects)library(emmeans)library(tidyquant)library(performance)conflicted::conflict_prefer("select", "dplyr")conflicted::conflict_prefer("filter", "dplyr")# Processing functionssource(here::here("Code/R/Processing_Functions.R"))# ggplot themetheme_set(theme_gmri(axis.line.y =element_line(),axis.ticks.y =element_line(), rect =element_rect(fill ="white", color =NA),panel.grid.major.y =element_blank(),strip.text.y =element_text(angle =0),axis.text.x =element_text(size =8),axis.text.y =element_text(size =8),legend.position ="bottom"))# labels for factor levelsarea_levels <-c("GoM", "GB", "SNE", "MAB")area_levels_long <-c("Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight")area_levels_all <-c("Northeast Shelf", area_levels)area_levels_long_all <-c("Northeast Shelf", area_levels_long)# table to join for swapping shorthand for long-hand namesarea_df <-data.frame(area =c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"),survey_area =c("SS", "GoM", "GB", "SNE", "MAB", "Northeast Shelf"),area_titles =c("Scotian Shelf", "Gulf of Maine", "Georges Bank", "Southern New England", "Mid-Atlantic Bight", "Northeast Shelf"))# Degree symboldeg_c <-"\u00b0C"
Placement for Spectra Minimum Size
Empirical size spectra studies are typically interested the descending slope of the size distribution over a range of body sizes. Depending on gear type and species involved, this range may differ from simply using all available data. Selectivity and catchability differences between gear types and across species commonly create circumstances where the individual size distribution of the data as-is does not follow the long-tailed, right-skewed shape that spectra are known for.
Rather than simply forcing a slope estimate from this data, abundance information may need to be corrected/adjusted or truncated. This is done to ensure that the individual size distribution and the parameters we report from them are representative of one another.
This doc covers how the minimum weight parameter for the bounded pareto distribution (used by the Edwards et al. MLE methodology) has been determined and any consequences of shifting the minima of the bounded distribution from an across-the-board minima of 1g to a value determined for each group.
Code
# Data used for Wigley estimateswigley_in <-read_csv(here::here("Data/processed/wigley_species_trawl_data.csv")) %>%left_join(area_df)
The Approach
It is generally understood that trawl gear are suitable for sampling a finite range of body sizes reliably. There are body-sizes at which individuals may simply pass through the mesh, there is some range of body sizes that are believed to be well sampled, and there is evidence that above certain sizes indivduals (dependent on species) may begin to avoid the gear.
The general idea behind this approach is to bin the individual abundances by log2 weight-bins, then determine the bin containing peak abundance density.
The following functions will aid in the labeling of log2 bins and determining abundance density:
Code
# Broad Distribution#log2 bins for easy-of-access#' @title Build Log 2 Bin Structure Dataframe#' #' @description Used to build a dataframe containing equally spaced log2 bins for#' size spectra analysis. Contains details on the left and right limits, midpoint, bin width, #' and a text label for the bins. log2bin number ascends with increasing size for eeasy plotting.#'#' @param log2_min #' @param log2_max #' @param log2_increment #'#' @return#' @export#'#' @examplesdefine_log2_bins <-function(log2_min =0, log2_max =13, log2_increment =1){# How many bins n_bins <-length(seq(log2_max, log2_min + log2_increment, by =-log2_increment))# Build Equally spaced log2 bin df log2_bin_structure <-data.frame("log2_bins"=as.character(seq(n_bins, 1, by =-1)),"left_lim"=seq(log2_max - log2_increment, log2_min, by =-log2_increment),"right_lim"=seq(log2_max, log2_min + log2_increment, by =-log2_increment)) %>%mutate(bin_label =str_c(round(2^left_lim, 3), " - ", round(2^right_lim, 3), "g"),bin_width =2^right_lim -2^left_lim,bin_midpoint = (2^right_lim +2^left_lim) /2) %>%arrange(left_lim)return(log2_bin_structure)}#' @title Assign Manual log2 Bodymass Bins - By weight#'#' @description Manually assign log2 bins based on individual length-weight bodymass #' in increments of 1 on the log2 scale. Returns data with bins assigned based on individual#' length-weight biomass#' #' Uses maximum weight, and its corresponding bin as the limit.#'#' @param wmin_grams Catch data prepared for mle calculation, use prep_wmin_wmax#' @param log2_increment Equally spaced increments to use for log 2 bin sizes. Default = 0.5.#'#' @return#' @export#'#' @examplesassign_log2_bins <-function(wmin_grams, log2_increment =1){#### 1. Set up bodymass bins# filter missing weights size_data <- wmin_grams %>%filter(wmin_g >0,is.na(wmin_g) ==FALSE, wmax_g >0,is.na(wmax_g) ==FALSE)# Get bodymass on log2() scale size_data$log2_weight <-log2(size_data$ind_weight_g)# Set up the bins - Pull min and max weights from data available#min_bin <- floor(min(size_data$log2_weight)) min_bin <-0 max_bin <-ceiling(max(size_data$log2_weight))# Build a bin key, could be used to clean up the incremental assignment or for apply style functions log2_bin_structure <-define_log2_bins(log2_min = min_bin, log2_max = max_bin, log2_increment = log2_increment)# Loop through bins to assign the bin details to the data log2_assigned <- log2_bin_structure %>%split(.$log2_bins) %>%map_dfr(function(log2_bin){# limits and labels l_lim <- log2_bin$left_lim r_lim <- log2_bin$right_lim bin_num <-as.character(log2_bin$log2_bin)# assign the label to the appropriate bodymasses size_data %>%mutate(log2_bins =ifelse( between(log2_weight, l_lim, r_lim), bin_num, NA),log2_bins =as.character(log2_bins)) %>%drop_na(log2_bins) })# Join in the size bins log2_assigned <-left_join(log2_assigned, log2_bin_structure, by ="log2_bins")# return the data with the bins assignedreturn(log2_assigned)}#' @title Calculate Normalized and De-Normalized Abundances#'#' @description For binned size spectra estimation we use the stratified abundance divided by the#' bin widths (normalized size spectra). Another way to present the data is to de-normalize, which #' takes those values and multiplies them by the mid-point of the log-scale bins.#' #' min/max & bin_increments are used to enforce the presence of a size bin in the event that #' there is no abundance. This is done for comparing across different groups/areas that should #' conceivably have the same size range sampled.#'#' @param log2_assigned size data containing the bin assignments to use#' @param min_log2_bin Minimum 2^x value for the size spectra being measured (>=)#' @param max_log2_bin Maximum 2^x value for the size spectra being measured (<)#' @param bin_increment The bin-width on log scale that separates each bin#' @param ... Additional grouping factors with which to aggregate on besides the size bins themselves#'#' @return#' @export#'#' @examplesaggregate_log2_bins <-function( log2_assigned, min_log2_bin =0, max_log2_bin =13, bin_increment =1, ...){# Full Possible Bin Structure# Fills in any gaps log2_bin_structure <-define_log2_bins(log2_min = min_log2_bin, log2_max = max_log2_bin, log2_increment = bin_increment)# Capture all the group levels with a cheeky expand()if(missing(...) ==FALSE){ log2_bin_structure <- log2_bin_structure %>% tidyr::expand(left_lim, distinct(log2_assigned, ...)) %>%full_join(log2_bin_structure) }# Get bin breaks log2_breaks <-sort(unique(c(log2_bin_structure$left_lim, log2_bin_structure$right_lim)))# Get Totals for bodymass and abundances log2_aggregates <- log2_assigned %>%group_by(left_lim, ...) %>%summarise(abundance =sum(numlen_adj, na.rm = T),weight_g =sum(wmin_g, na.rm = T),.groups ="drop")# Join back in what the limits and labels are# The defined bins and their labels enforce the size limits log2_prepped <-left_join(x = log2_bin_structure, y = log2_aggregates)#### Fill Gaps with Zero's?? # This ensures that any size bin that is intended to be in use is actually used log2_prepped <- log2_prepped %>%mutate(abundance =ifelse(is.na(abundance), 1, abundance),weight_g =ifelse(is.na(weight_g), 1, weight_g))#### normalize abundances using the bin widths log2_prepped <- log2_prepped %>%mutate(normalized_abund = abundance / bin_width,normalized_biom = weight_g / bin_width,# # Remove Bins Where Normalized Biomass < 0? No!# normalized_abund = ifelse(normalized_abund < 2^0, NA, normalized_abund),# norm_strat_abund = ifelse(norm_strat_abund < 2^0, NA, norm_strat_abund) )# Add de-normalized abundances (abundance * bin midpoint) log2_prepped <- log2_prepped %>%mutate(denorm_abund = normalized_abund * bin_midpoint,denorm_biom = normalized_biom * bin_midpoint)# Return the aggregationsreturn(log2_prepped)}
Using those functions we can assign the bins based on the body-size column in the dataframe we would feed to our spectra slope workflow.
The following few plots will use data from Georges Bank, for its Fall 1996 size distribution.
In this plot peak normalized abundance (abundance density) is at the 32-64g bin.
This example also highlights that these empirical distributions may be multi-modal which is instructive as a common edge-case.
In this group we also another common situation where normalized abundance (abundance density) approaches 0, which appears strangely when plotted this way on log-log axes . I’m still unsure how much that matters, or if it is only visually unappealing.
Code
# # Group details# glabs <- list(# "yr" = 2000,# "area" = "MAB",# "season" = "Spring"# )# Group details - this one is badglabs <-list("yr"=1996,"area"="GB","season"="Fall")# Filter that outtest_case <- wig_l2 %>%filter( est_year == glabs$yr, survey_area == glabs$area, season == glabs$season)# Plot the thing - cheater wayggplot(test_case) +geom_histogram(aes(x = wmin_g, weight = (numlen_adj/bin_width),color =after_stat(count) ==max(after_stat(count))), fill ="gray70",breaks =2^c(0:15), linewidth =1,alpha =0.7) +scale_color_manual(values =c("transparent", "black")) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2),breaks =2^seq(-12,10, 2)) +scale_x_continuous(trans =transform_log(base =2),labels =label_log(base =2),breaks =2^c(0:15)) +labs(title ="Group Details:", subtitle =str_c("Area: ", glabs$area,"\nSeason: ", glabs$season, "\nYear: ", glabs$yr ),y ="Abundance Density",x ="Bodyweight (g)",color ="Peak Number Density")
Depending on how we approach setting the limits, this group’s size distribution could yield very different spectra slope information.
Code
# Get aggregatesex_aggs <-aggregate_log2_bins(test_case, bin_increment =1) %>%mutate(area = glabs$area, est_year = glabs$yr,season = glabs$season)# Locate Tallestex_tallest <- ex_aggs %>%arrange(desc(normalized_abund)) %>%slice(1) %>%pull(left_lim)# Add info back to aggregatesex_aggs <- ex_aggs %>%mutate(is_tallest =if_else(left_lim == ex_tallest, T, F),full_set =TRUE,after_tallest =if_else(left_lim >= ex_tallest, T, F))# Plot the different slopesplot1 <-ggplot(ex_aggs, aes( x = left_lim, y = normalized_abund)) +geom_col(color =gmri_cols("gmri blue"),fill =gmri_cols("gmri blue"),alpha =0.6,linewidth =1) +geom_smooth(method ="lm", formula = y~x, color =gmri_cols("gmri blue"), se = F) + ggpmisc::stat_poly_eq(method ="lm", ggpmisc::use_label("eq"), formula = y ~ x,geom ="label", label.x =2, label.y =-4) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2),breaks =2^seq(-10,16, 2)) +scale_x_continuous(labels =label_math(expr =2^.x),breaks =c(0:15)) +scale_fill_gmri() +scale_color_manual(values =c("transparent", gmri_cols("lv orange"))) +facet_grid(area ~ season*est_year, scale ="free") +labs(y ="Number Density",x ="Body Weight")# Plot the different slopesplot2 <-ggplot(filter(ex_aggs, after_tallest), aes( x = left_lim, y = normalized_abund)) +geom_col(data =filter(ex_aggs, after_tallest == F),fill =gmri_cols("gmri blue"),alpha =0.2,linewidth =1) +geom_col(color =gmri_cols("gmri blue"),fill =gmri_cols("gmri blue"),alpha =0.6,linewidth =1) +geom_smooth(method ="lm", formula = y~x, color =gmri_cols("gmri blue"), se = F) + ggpmisc::stat_poly_eq(method ="lm", ggpmisc::use_label("eq"), formula = y ~ x,geom ="label", label.x =7, label.y =-4) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2),breaks =2^seq(-10,16, 2)) +scale_x_continuous(labels =label_math(expr =2^.x),breaks =c(0:15)) +scale_fill_gmri() +scale_color_manual(values =c("transparent", gmri_cols("lv orange"))) +facet_grid(area ~ season*est_year, scale ="free") +labs(y ="Number Density",x ="Body Weight" )plot1 / plot2
Dependent on decision making and when using the binned methodology, this same dataset could yield a slope of -.76 or a much steeper -1.91 when using a binned spectra methodology.
Passing these same minimum size cutoffs will similarly effect the slope parameters and the goodness-of-fit for the MLE methodology.
Code
# What do these look like with MLe fits?#' @title Individual Size Distribution Plot Prep#' #' @description Prepares wmin_grams data to be plotted with the MLE#' size spectrum slope fit.#'#' @param biomass_data Data prepped for mle estimation, use prep_wmin_wmax#' @param min_weight_g Minimum weight cutoff to use to match bounded pareto minimum#'#' @return#' @export#'#' @examplesisd_plot_prep <-function(biomass_data = databin_split, min_weight_g =1){# Arrange by lower weight biomass_data <- dplyr::arrange(biomass_data, desc(wmin_g)) %>%select(wmin_g, wmax_g, numlen_adj)# truncate the data to in match the spectra we fit's xmin/xmax biomass_data <- biomass_data %>%filter(wmin_g >= min_weight_g, wmin_g !=0,is.na(wmin_g) ==FALSE, wmax_g !=0,is.na(wmax_g) ==FALSE)# Get the total number of fish for the group total_abundance <-sum(biomass_data$numlen_adj)# Have to do not with dplyr: wmin.vec <- biomass_data$wmin_g # Vector of lower weight bin limits wmax.vec <- biomass_data$wmax_g # Vector of upper weight bin limits num.vec <- biomass_data$numlen_adj # Vector of corresponding counts for those bins# doing it with purr and pmap biomass_bins <-select(biomass_data, wmin_g, wmax_g) %>%distinct(wmin_g, wmax_g)# Go row-by-row and get the cumulative total greater than each # minimum weight bin discrete size bin out_data <-pmap_dfr(biomass_bins, function(wmin_g, wmax_g){# 1. Count times wmin.vec >= individual wmin_g, multiply by number of fish for year# cumulative totals of sizes larger than the left lim countGTEwmin <-sum( (wmin.vec >= wmin_g) * num.vec)# 2. Count times wmin.vec >= individual wmax_g, multiply by number of fish for year# cumulative totals of sizes larger than the right lim lowCount <-sum( (wmin.vec >= wmax_g) * num.vec)# 3. Count times wmax.vec > individual wmin_g, multiply by number of fish for year highCount <-sum( (wmax.vec > wmin_g) * num.vec)# 4. build table out_table <-data.frame("wmin_g"= wmin_g,"wmax_g"= wmax_g,"countGTEwmin"= countGTEwmin,"lowCount"= lowCount,"highCount"= highCount ) })# return the purr versionreturn(out_data)}
Code
# take data without moving the min, get mle resultsmle_results_1g <-group_binspecies_spectra(ss_input = test_case, grouping_vars =c("area", "est_year", "season"), use_weight = T, abundance_vals ="numlen_adj",length_vals ="length_cm", isd_xmin =1, isd_xmax =NULL, global_min = F, global_max = F, bin_width =1, vdiff =2) # Control optionsmle_results <- mle_results_1gactual_vals <- test_case %>%filter(wmin_g >=1)min_used <- mle_results$xmin_fitmax_used <- mle_results$xmax_fit#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <- max_used# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_1g <-ggplot() +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("lv orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.65,shape =1) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2), breaks =2^seq(1, 16,1),limits =c(2^0, 2^14),expand =expansion(add =c(0,0))) +scale_x_continuous(trans =transform_log(base =2),labels =label_log(base =2), breaks =2^seq(1, 12,1),limits =c(2^0, 2^14),expand =expansion(add =c(0,0))) +labs(x ="Weight (g)", y ="Abundance", title ="GB - Fall - 1996:\nWMIN = 1g",subtitle =str_c("MLE Slope Param: ", round(mle_results$b, 2)))
Code
# take data with moving the min, get mle results# cutoff based on the bins is 32mle_results_32g <-group_binspecies_spectra(ss_input =filter(test_case, wmin_g >=2^5),grouping_vars =c("area", "est_year", "season"), use_weight = T, abundance_vals ="numlen_adj",length_vals ="length_cm", isd_xmin =32, isd_xmax =NULL, global_min = F, global_max = F, bin_width =1, vdiff =2) # Control optionsmle_results <- mle_results_32gactual_vals <- test_case %>%filter(wmin_g >=2^5)min_used <- mle_results$xmin_fitmax_used <- mle_results$xmax_fit#### -- Plpot setup below ---# Power law parameters and summary details for the group of data:b.MLE <- mle_results$btotal_abundance <- mle_results$nb.confMin <- mle_results$confMinb.confMax <- mle_results$confMax# Create range of x values from the group to get power law predictions# min and max weights for power lawxmin <- min_usedxmax <- max_used# Create x values (individual bodymass) to predict across# break up the Xlim into pieces between min and maxx.PLB <-seq(from = xmin,to = xmax,by =0.1) # get the length of that vectorx.PLB.length <-length(x.PLB) # Y values for plot limits/bounds/predictions from bounded power law pdfy.PLB <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.MLE, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMin <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMin, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundancey.PLB.confMax <- (1- sizeSpectra::pPLB(x = x.PLB, b = b.confMax, xmin =min(x.PLB), xmax =max(x.PLB))) * total_abundance# Put it in a df to make it easierPLB_df <-data.frame(x.PLB = x.PLB,y.PLB = y.PLB,confMin = y.PLB.confMin,confMax = y.PLB.confMax) %>%mutate()# 2. solve the power law function again in mutate for residuals# need to do it twice for wmin_g and wmax_g# Aggregate across overlapping size rangesp_prepped <- actual_vals %>%isd_plot_prep(min_weight_g = min_used) %>%left_join(actual_vals) %>%select(comname, wmin_g, wmax_g, lowCount, highCount, countGTEwmin) %>%# Get the residualsmutate(# Estimated Abundancewmin_estimate = (1- sizeSpectra::pPLB(x = wmin_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,wmax_estimate = (1- sizeSpectra::pPLB(x = wmax_g, b = b.MLE, xmin = xmin, xmax = xmax)) * total_abundance,# Residualsb_resid = countGTEwmin - ((wmin_estimate + wmax_estimate)/2))##### Plotting Raw ##### Messing with the plot axesfit_32g <-ggplot() +geom_line(data = PLB_df, aes(x.PLB, y.PLB),color =gmri_cols("lv orange"), linewidth =1) +geom_point(data = p_prepped %>%filter(lowCount >0),aes(x = (wmin_g+wmax_g)/2,y = countGTEwmin),color =gmri_cols("gmri blue"),alpha =0.65,shape =1) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2), breaks =2^seq(1, 16,1),limits =c(2^0, 2^14),expand =expansion(add =c(0,0))) +scale_x_continuous(trans =transform_log(base =2),labels =label_log(base =2), breaks =2^seq(1, 12,1),limits =c(2^0, 2^14),expand =expansion(add =c(0,0))) +labs(x ="Weight (g)", y ="Abundance", title ="GB - Fall - 1996:\nWMIN = 32g",subtitle =str_c("MLE Slope Param: ", round(mle_results$b, 2)))
Code
fit_1g / fit_32g
If using the MLE methodology, the difference in the exponent parameters returned by shifting the wmin parameter is the difference between -0.94 & -1.53.
In both cases, abundance information in the center of the size distribution (16-256g) is poorly fit when including abundance information from smaller body sizes. This tradeoff is problematic, because we are in some sense accomadating data at small sizes that are known to be under/inconsistently samples by the sampling gear at the expense of information from larger size classes which are better sampled.
Selectivity/Catchability and Abundance Density Peaks
Why the peak abundance (or near peak) as minimum size threshold?
It is widely accepted that below a certain size fishes may simply escape through the mesh and avoid sampling, while larger individuals may out-swim or avoid the gear altogether. Together these create a dome-shaped catchability curve with some intermediate size having the highest catchability.
Data collected from gears with catchability curves are prone to systematic under-sampling of smaller individuals that are poorly selected and individuals large enough to avoid the gear.
These relationships will vary species-to-species, but broadly speaking this will be true.
Code
# Plot the different slopesggplot(filter(ex_aggs, after_tallest), aes( x = left_lim, y = normalized_abund)) +geom_col(data =filter(ex_aggs, after_tallest == F),fill =gmri_cols("gmri blue"),alpha =0.2,linewidth =1) +geom_col(color =gmri_cols("gmri blue"),fill =gmri_cols("gmri blue"),alpha =0.6,linewidth =1) +geom_text(data =data.frame(x =2, y =164, label ="Under-representation\nof small body sizes\n b/c differences\n in catchability"),aes(x,y, label = label), color =gmri_cols("gmri blue")) +geom_abline(slope =-1.91, intercept =17, color =gmri_cols("gmri blue"), linewidth =1) +scale_y_continuous(trans =transform_log(base =2),labels =label_log(base =2),breaks =2^seq(-10,18, 2),expand =expansion(add =c(0, 2^3))) +scale_x_continuous(labels =label_math(expr =2^.x),breaks =c(0:15)) +scale_fill_gmri() +scale_color_manual(values =c("transparent", gmri_cols("lv orange"))) +facet_grid(area ~ season*est_year, scale ="free") +labs(y ="Number Density",x ="Body Weight" )
Peak Location Across Years
Here are where all of the peaks exist for each group viewed as an animation:
Code
# Get the plotting summarieswig_l2_aggs <- wig_l2 %>%unite("groupvar", area, season, est_year, sep ="XX") %>%split(.$groupvar) %>%map_dfr(function(x){# Get aggregates l2_aggs <-aggregate_log2_bins(x, bin_increment =1)# Locate Tallest tallest <- l2_aggs %>%arrange(desc(normalized_abund)) %>%slice(1) %>%pull(left_lim)# Add info back to aggregates l2_aggs <- l2_aggs %>%mutate(is_tallest =if_else(left_lim == tallest, T, F))# Returnreturn(l2_aggs)}, .id ="groupvar") %>%separate(groupvar, into =c("area", "season", "est_year"), sep ="XX") %>%mutate(area =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")))# # ggplot with animation# animated_plot <- ggplot(wig_l2_aggs) +# geom_col(# aes(# x = left_lim,# y = normalized_abund,# fill = season,# color = is_tallest),# alpha = 0.6,# linewidth = 1.5) +# scale_y_continuous(# trans = transform_log(base = 2),# labels = label_log(base = 2),# breaks = 2^seq(-10,16, 2)) +# scale_x_continuous(# labels = label_math(expr = 2^.x),# breaks = c(0:15)) +# scale_fill_gmri() +# scale_color_manual(values = c("transparent", "black")) +# facet_grid(area ~ season, scale = "free") +# labs(# y = "Abundance Density",# x = "Bodyweight (g)",# title = 'Year: {closest_state}', # Dynamic title for year# fill = "Season") +# transition_states(# est_year,# transition_length = 1,# state_length = 5) +# ease_aes('linear') # Smooth transition# # # # # Animate and save# animate(animated_plot, nframes = 300, fps = 10, width = 1000, height = 800)# anim_save(here::here("faceted_tallest_bin_spectra_wigley.gif"), animated_plot)# Show the GIFknitr::include_graphics(here::here("faceted_tallest_bin_spectra_wigley.gif"))
Bodymass spectra abundance peaks
From this animation we can see that the placement and variability in the location of peak abundance densities is different seasonally and between the different regions.
Looking at Individual Years w/ Observable
I like having the ability to look at them one-by-one, but don’t want to make 800 plots or tabs. I’m using this exercise as an excuse to use observable for interactive selections.
Across these plots it is possible to see many instances where the actual individual size distribution does not show the characteristic shape we’ve come to expect for size spectra.
Code
# Reformat years as a date - messes with slider# year_avgs <- mutate(year_avgs, est_year = as.Date(str_c(est_year, "-06-01")))# Define data to use for jsojs_define(data = wig_l2_aggs %>%arrange(est_year))# data = filter(wig_l2_aggs, normalized_abund > 1) %>% # arrange(est_year))
Code
viewof yearSlider = Inputs.range( [1970,2019],// If not pulling from the data {label:"Year:",value:1970,// If not pulling from the datastep:1} // Step size for numeric slider)// Create selections for season and area//viewof seasonSelect = Inputs.select(["Spring", "Fall"], { label: "Season" })viewof areaSelect = Inputs.select(["Northeast Shelf","Gulf of Maine","Georges Bank","Southern New England","Mid-Atlantic Bight"], { label:"Area" })// // Filtering Function// filteredData = transpose(data).filter(function(d) {// return yearSlider == d.est_year && d.area === areaSelect;// })filteredSpring =transpose(data).filter(function(d) {return yearSlider == d.est_year&& d.area=== areaSelect && d.season==="Spring";})filteredFall =transpose(data).filter(function(d) {return yearSlider == d.est_year&& d.area=== areaSelect && d.season==="Fall";})
Code
// Plot with FacetsPlot.plot({facet: {data: filteredSpring,x: d => d.area,// Columns: areay: d => d.season// Rows: season//y: { field: "season", domain: ["Spring", "Fall"] } // Custom row order - doesn't work },marks: [ Plot.barY(filteredSpring, { x:"left_lim",y:"normalized_abund",fill:"#00608a",//stroke: "season",stroke: d => d.is_tallest?"black":"none",// Change stroke color for highlighted barsstrokeWidth: d => d.is_tallest?4:0// Thicker stroke for highlighted bars }) ],x: {label:"Body Mass (g)",tickFormat: d =>`2^${d}`// Format labels as powers of 2 },y: {label:"Abundance",type:"symlog",base:2 },style: {facetPadding:5 }})
Code
// Plot with FacetsPlot.plot({facet: {data: filteredFall,x: d => d.area,// Columns: areay: d => d.season// Rows: season//y: { field: "season", domain: ["Spring", "Fall"] } // Custom row order - doesn't work },marks: [ Plot.barY(filteredFall, { x:"left_lim",y:"normalized_abund",fill:"#ea4f12",//stroke: "season",stroke: d => d.is_tallest?"black":"none",// Change stroke color for highlighted barsstrokeWidth: d => d.is_tallest?4:0// Thicker stroke for highlighted bars }) ],x: {label:"Body Mass (g)",tickFormat: d =>`2^${d}`// Format labels as powers of 2 },y: {label:"Abundance",type:"symlog",base:2 },style: {facetPadding:5 }})
Peak Location Frequencies
Without over-doing the peak locations for multi-modal situations, here are the peak locations and their frequencies for each group:
From this dataframe we can strip out the key group information and the minimum size as a lookup table.
This can be joined and used as a filter as a preparatory step before fitting the group spectra. Then we can re-run the MLE estimation of the slope parameters and see how they have changed.
Code
# Make the keymin_size_key <- wig_l2_aggs %>%filter(is_tallest) %>%distinct(area, season, est_year, left_lim, is_tallest) %>%mutate(min_cutoff_g =2^left_lim) %>%select(-left_lim) %>%mutate(est_year =as.numeric(est_year))# # Save them# write_csv(min_size_key, here::here("Data/processed/wigley_species_l2peaks_key.csv"))
Results with Minimum Sizes Applied
Code
# Re-ran spectra with net minima# Moved this into 02_estimate_mleBin_Spectra.R to go with the rest of pipeline# Load the saved keymin_size_key <-read_csv(here::here("Data/processed/wigley_species_l2peaks_key.csv"))# Load the data fed to spectra fit after those are appliedwigley_in_new <-read_csv( here::here("Data/processed/wigley_species_trawl_wmin_filtered.csv")) %>%mutate(area =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")) )# Load the spectra results# Moved this into 02_estimate_mleBin_Spectra.R to go with the rest of pipelinemoving_peak_spectra <-read_csv( here::here("Data/processed/wigley_species_l2peaks_bmspectra.csv")) %>%mutate(area =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")) )# # Plot the range of sizes that they span# wigley_in_new %>% # group_by(area, season, est_year) %>% # summarise(# wmin = min(wmin_g, na.rm = T),# wmax = max(wmin_g, na.rm = T),# .groups = "drop") %>% # ggplot(aes(x = wmin, xend = wmax, y = est_year, yend = est_year, color = season)) +# geom_segment() +# scale_color_gmri() +# facet_grid(area~season) +# scale_x_continuous(labels = label_number(big.mark = ",", scale = 0.001)) +# labs(x = "Weight (kg)", title = "Size Ranges Fed to Spectra Fitting")
Code
# Load the prepared dataset from 03_merge_results_to_covariates.csv# has seasonal bt and annual landings addedpeak_chase_model_df <-read_csv(here::here("Data/model_ready/wigley_community_shifting_bmspectra_mod.csv")) %>%mutate(area =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")) )
Code
# Function to get the Predictions# Flag effect fits that are non-significant ###get_preds_and_trendsignif <-function(mod_x){ modx_preds <-as.data.frame(# Model predictionsggpredict(mod_x, terms =~ est_year * area * season) ) %>%mutate(area =factor(group, levels = area_levels_long_all),season =factor(facet, levels =c("Spring", "Fall")))# Just survey area and year modx_emtrend <-emtrends(object = mod_x,specs =~ area*season,var ="est_year") %>%as_tibble() %>%mutate(zero =0,non_zero =if_else(between(zero, lower.CL, upper.CL), F, T))# Preds with signif modx_preds %>%left_join(select(modx_emtrend, area, season, non_zero))}
Once the size distribution has been truncated according to the approach above, we now see some different long-term trends emerge. Most regions now show stability in the community size distribution over time, with Spring Georges Bank showing long-term steepening, and Spring MAB showing a shallowing trend over time.
Code
# Model linear trends in timemoving_peak_spectra <- moving_peak_spectra %>%mutate(area =factor(area, levels = area_levels_long_all),est_year =as.numeric(est_year))spectra_trend_mod <-gls( b ~scale(est_year) * area * season,correlation =corAR1(form =~ est_year | area/season),data = moving_peak_spectra)#check_model(spectra_trend_mod)# plot(check_collinearity(spectra_trend_mod))# Get the predictions and flag whether they are significantbmspectra_trend_predictions <-get_preds_and_trendsignif(spectra_trend_mod) %>%mutate(metric ="bodymass_spectra",area =factor(area, levels = area_levels_long_all))# Make the plotbmspectra_trend_p <-ggplot() +geom_ribbon(data =filter(bmspectra_trend_predictions, non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(data = moving_peak_spectra,aes(est_year, b, color = season),size =0.8, alpha =0.5) +geom_ma(data = moving_peak_spectra,aes(est_year, b, color = season, linetype ="5-Year Moving Average"), n =5, ma_fun = SMA, key_glyph ="timeseries" ) +geom_line(data =filter(bmspectra_trend_predictions, non_zero ==TRUE),aes(x, predicted, color = season, linetype ="Significant Trend"),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(area ~ ., scales ="free") +labs(title ="Trends in Spectra w/ Shifting wmin",x ="Year",y ="Size Distribution Exponent",color ="Season",fill ="Season", linetype ="Trend")# Show plotbmspectra_trend_p
All Trends Figure
Spectra trends in the context of overall emdian size and temperature trends.
Code
# Load the median weight/length datawigley_size_df <-read_csv(here::here("Data/processed/wigley_species_size_summary.csv"))# shelf-wide summarywigley_sizes_shelf <-read_csv(here::here("Data/processed/shelfwide_wigley_species_size_summary.csv"))# Join themsize_results <-bind_rows(wigley_size_df, wigley_sizes_shelf) %>%mutate(community ="Wigley Subset",survey_area =factor(survey_area, levels = area_levels_all)) %>%left_join(area_df)# Get trends:wt_mod_wig <-gls( med_wt_kg ~scale(est_year) * area * season,correlation =corAR1(form =~ est_year | area/season),data =drop_na(size_results, med_wt_kg))# Get the predictions and flag whether they are significantsize_trend_predictions <-get_preds_and_trendsignif(wt_mod_wig) %>%mutate(model_id ="med_wt_kg-Wigley Subset") %>%separate(model_id, into =c("metric", "community"), sep ="-") %>%mutate(metric =fct_rev(metric))# Median length - all speciessize_long <- size_results %>%pivot_longer(cols =c(med_len_cm, med_wt_kg), names_to ="metric", values_to ="val") %>%mutate(survey_area =fct_relevel(survey_area, area_levels_all),area =fct_relevel(area, area_levels_long_all),season =fct_rev(season))# Median weight - wigley species# weight plotswt_plot <- size_long %>%filter(metric =="med_wt_kg") %>%drop_na(val) %>%ggplot() +geom_ribbon(data =filter(size_trend_predictions, metric =="med_wt_kg", non_zero ==TRUE),aes(x, ymin = conf.low, ymax = conf.high, fill = season),linewidth =1, alpha =0.35) +geom_point(aes(est_year, val, color = season),size =0.8, alpha =0.5) +geom_ma(aes(est_year, val, color = season, linetype ="5-Year Moving Average"), n =5, ma_fun = SMA, key_glyph ="timeseries") +geom_line(data =filter(size_trend_predictions, metric =="med_wt_kg", non_zero ==TRUE),aes(x, predicted, color = season, linetype ="Significant Trend"),linewidth =1, alpha =1) +scale_fill_gmri() +scale_color_gmri() +facet_grid(area ~ .,scales ="free") +labs(x ="Year",y ="Median Weight (kg)",color ="Season",fill ="Season", linetype ="Trend")
These timeseries are in-essence annual check-ins of the multi-species, multi-cohort community size structure in an area. There is some reason to think that there should be some memory of the previous six months and/or the previous year. Similarly, the individual size distribution of the community may respond to external factors with some time-lag.
I have been informed that autocorrelation in the spectra slopes themselves may not be a safe assumption to make, but here are ACF plots regardless.
Code
# Pull out acf stuffspectra_acf <- moving_peak_spectra %>%unite("regseas", area, season, sep ="XX") %>%split(.$regseas) %>%map_dfr(function(x){# drop NA x <-arrange(x, est_year) %>%drop_na(b)# Do ACF x_acf <-acf(x$b, plot = F, lag.max =10)# Get the signif:# https://www.squaregoldfish.co.uk/programming/r_acf_significance.md/# Not 100% sure n is the same for ccf as it is for acf, but... significance_level <-qnorm((1+0.95)/2)/sqrt(sum(!is.na(x$b)))data.frame("acf"= x_acf$acf,"lag"= x_acf$lag,"sigpos"= significance_level,"signeg"= significance_level*-1 ) }, .id ="regseas") %>%separate("regseas", into =c("area", "season"), sep ="XX") %>%mutate(# Flag when it crosses thresholdsig_flag =ifelse(acf < signeg | acf > sigpos, T, F),# Set Factor Levelsarea =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall"))) # Plot thingsggplot(spectra_acf, aes(lag, acf)) +geom_hline(yintercept =0, color ="gray25", linewidth =1) +geom_vline(xintercept =0, color ="gray25", linewidth =1) +geom_ribbon(aes(ymin = signeg, ymax = sigpos), alpha =0.2, linetype =2, fill ="lightgray",color ="gray65") +geom_col(alpha =0.65) +geom_col(data =filter(spectra_acf, sig_flag), color ="black") +scale_x_continuous(limits =c(-1,10),breaks =c(1:9),expand =expansion(add =c(0,0))) +facet_grid(area~season) +labs(title ="Spectra Slope ACF",x ="Lag (years)", y ="ACF", )
Based on the auto-corrrelation function results we have a similar outlook on the annual autocorrelations as we did before shifting wmin where there is not very much evidence of long-term memory in the size distribution.
Spring and fall communities from the same area remain uncorrelated. This is surprising to me as the noise coming from the presence/absence of cohorts of smaller individuals should be reduced.
Code
moving_peak_spectra %>%ggplot(aes(xmin_actual, b)) +geom_point() +geom_smooth(method ="lm") + ggpmisc::stat_poly_eq(ggpmisc::use_label(c("P", "R2")), formula = y ~ x) +scale_y_continuous(expand =expansion(mult =c(0, 0.5))) +scale_x_continuous(trans =transform_log(base =2), labels =label_log(base =2)) +facet_wrap(~area, ncol =2, scales ="free") +labs(title ="Bias from Shifting the Distribution's wmin")
Bottom Temperature CCF
Shelf-Wide:
Spectra lead any significant bottom temperature relationships.
Regionally:
Seems centered on the same year over the different groups, not seeing a progression that would point to an important lag effect that would work for all areas.
This is not surprising to me. When we build SDM’s and temperature is an important factor we are explicitly making a model where individuals shift their distribution in space-time based on the observed temperature at that time and place.
Code
# Function to grab the correlation data and lag dataget_ccf_vector <-function(x,y, lagmax =10){# Run the ccf ccf_data <-ccf(x, y, plot= F , lag.max = lagmax)# Get the signif:# https://www.squaregoldfish.co.uk/programming/r_acf_significance.md/# Not 100% sure n is the same for ccf as it is for acf, but... significance_level <-qnorm((1+0.95)/2)/sqrt(sum(!is.na(x)))data.frame("acf"= ccf_data$acf,"lag"= ccf_data$lag,"sigpos"= significance_level,"signeg"= significance_level*-1 )}# Run Local CCF on Bottom Temperaturebtemp_ccf <- peak_chase_model_df %>%unite(col ="regseas", area, season, sep ="XX") %>%split(.$regseas) %>%map_dfr(~get_ccf_vector(x = .x$bot_temp, y = .x$b), .id ="regseas") %>%separate(col ="regseas", into =c("area", "season"), sep ="XX") %>%mutate(# Flag when it crosses thresholdsig_flag =ifelse(acf < signeg | acf > sigpos, T, F),# Set Factor Levelsarea =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")))# Plot itggplot(btemp_ccf, aes(lag, acf)) +geom_hline(yintercept =0, color ="gray25", linewidth =1) +geom_vline(xintercept =0, color ="gray25", linewidth =1) +geom_ribbon(aes(ymin = signeg, ymax = sigpos), alpha =0.2, linetype =2, fill ="lightgray",color ="gray65") +geom_col(alpha =0.65) +geom_col(data =filter(btemp_ccf, sig_flag), color ="black") +scale_x_continuous(expand =expansion(add =c(0,0))) +facet_grid(area~season) +labs(title ="Bottom Temperature CCF",x ="Bottom Temperature Lag (years)", y ="ACF", )
Landings CCF
Its plausible that landings have a lagged effect so here are the CCFs for that.
The CCF results for landings appear to show that the spectra in GOM lead the landings. This makes sense with regards to regulatory cutbacks.
In Georges Bank there are significant negative relationships 9 & 10 years out. This aligns better in direction with the mechanism that removing large fish steepens spectra, but a decade’s lead time is long for a mechanism that is essentially instantaneous.
SNE spring shows what looks more supportive.
MAB Spring is either centered or spectra are leading the landings.
Code
# Run Local CCF on Bottom Temperaturelandings_ccf <- peak_chase_model_df %>%drop_na(total_weight_lb) %>%unite(col ="regseas", area, season, sep ="XX") %>%split(.$regseas) %>%map_dfr(~get_ccf_vector(x = .x$total_weight_lb, y = .x$b), .id ="regseas") %>%separate(col ="regseas", into =c("area", "season"), sep ="XX") %>%mutate(# Flag when it crosses thresholdsig_flag =ifelse(acf < signeg | acf > sigpos, T, F),# Set Factor Levelsarea =factor(area, levels = area_levels_long_all),season =factor(season, levels =c("Spring", "Fall")))ggplot(landings_ccf, aes(lag, acf)) +geom_hline(yintercept =0, color ="gray25", linewidth =1) +geom_vline(xintercept =0, color ="gray25", linewidth =1) +geom_ribbon(aes(ymin = signeg, ymax = sigpos), alpha =0.2, linetype =2, fill ="lightgray",color ="gray65") +geom_col(alpha =0.65) +geom_col(data =filter(landings_ccf, sig_flag), color ="black") +scale_x_continuous(expand =expansion(add =c(0,0))) +facet_grid(area~season) +labs(title ="Total Landings CCF",x ="Total Landings Lag (years)", y ="ACF", )
Modeling Relationships with Large External Factors
Here I could explore whether the relationships to bottom temperature or landings, but I remain unconvinced this is the right path forward.
Landings and Temperature Global Model
The following results are from a lienar model with both scaled log(landings) (the annual total) and the seasonal mean bottom temperatures for each area.
Code
# Just drop the random effect part for nowfull_model_ar <- nlme::gls(# Modelmodel = b ~ area * season *scale(bot_temp) + area *scale(log(total_weight_lb)), # Datadata = peak_chase_model_df %>%drop_na(total_weight_lb),# Autocorrelationcorrelation =corAR1(form =~ est_year | area/season))# Check the model# check_model(full_model_ar)# # Collinearity without interactions# plot(# check_collinearity(# nlme::gls(# b ~ area + season + scale(bot_temp) + scale(log_land),# data = drop_na(wigley_bmspectra_model_df, total_weight_lb) %>%# filter(area != "Northeast Shelf") %>% # mutate(log_land = log(total_weight_lb)),# correlation = corAR1(form = ~ est_year | survey_area/season)# )# )# )# # confidence intervals on phi# intervals(full_model_ar)$corStruct# # Summary Tabletbl_regression(full_model_ar)
Characteristic
Beta
95% CI
1
p-value
(Intercept)
-1.3
-1.6, -0.90
<0.001
areaGulf of Maine
0.05
-0.42, 0.52
0.8
areaGeorges Bank
-0.37
-0.89, 0.16
0.2
areaSouthern New England
-0.01
-0.43, 0.41
>0.9
areaMid-Atlantic Bight
0.05
-0.32, 0.43
0.8
seasonFall
-0.15
-0.47, 0.16
0.3
scale(bot_temp)
0.08
-0.24, 0.40
0.6
scale(log(total_weight_lb))
0.03
-0.24, 0.30
0.9
areaGulf of Maine:seasonFall
0.31
-0.16, 0.77
0.2
areaGeorges Bank:seasonFall
0.68
0.16, 1.2
0.011
areaSouthern New England:seasonFall
0.00
-0.45, 0.45
>0.9
areaMid-Atlantic Bight:seasonFall
-0.44
-0.93, 0.05
0.077
areaGulf of Maine:scale(bot_temp)
0.00
-0.48, 0.47
>0.9
areaGeorges Bank:scale(bot_temp)
-0.21
-0.68, 0.26
0.4
areaSouthern New England:scale(bot_temp)
0.03
-0.37, 0.42
0.9
areaMid-Atlantic Bight:scale(bot_temp)
0.21
-0.17, 0.60
0.3
seasonFall:scale(bot_temp)
-0.01
-0.43, 0.41
>0.9
areaGulf of Maine:scale(log(total_weight_lb))
0.16
-0.26, 0.58
0.5
areaGeorges Bank:scale(log(total_weight_lb))
-0.28
-0.74, 0.18
0.2
areaSouthern New England:scale(log(total_weight_lb))
areaSouthern New England:seasonFall:scale(bot_temp)
-0.17
-0.70, 0.36
0.5
areaMid-Atlantic Bight:seasonFall:scale(bot_temp)
-0.42
-0.93, 0.08
0.10
1
CI = Confidence Interval
From this model we see that an effect of bottom temperature on the size distribution is only seen in the Spring-MAB data. The following is the marginal-effects plot of the data with that relationship overlayed.
With increasing bottom temperature, the Spring-MAB size distribution has become less steep.
Code
# Clean up the plot:# Plot the predictions over datatemp_preds <-as.data.frame(ggpredict( full_model_ar, terms =~ bot_temp * area * season)) %>%mutate(area =factor(group, levels = area_levels_long_all),season =factor(facet, levels =c("Spring", "Fall")))#### Trend Posthoc ##### Flag effect fits that are non-significant ###temp_emtrend <-emtrends(object = full_model_ar,specs =~ area * season,mode ="appx-satterthwaite",var ="bot_temp") %>%as_tibble() %>%mutate(zero =0,non_zero =if_else(between(zero, lower.CL, upper.CL), F, T))# Limit temperature plotting rangeactual_range <- peak_chase_model_df %>%filter(area !="Northeast Shelf") %>%group_by(season, area) %>%summarise(min_temp =min(bot_temp)-2,max_temp =max(bot_temp)+2,.groups ="drop")# Plot over observed datalocal_btemp <- temp_preds %>%left_join(actual_range) %>%filter((x < min_temp) == F, (x > max_temp) == F) %>%left_join(select(temp_emtrend, area, season, non_zero)) %>%filter(non_zero) %>%ggplot() +geom_ribbon(aes(x, ymin = conf.low, ymax = conf.high, group = season), alpha =0.1) +geom_line(aes(x, predicted, color = season), linewidth =1) +geom_point(data = peak_chase_model_df,aes(bot_temp, b, color = season),alpha =0.4,size =1) +facet_grid(area~., scales ="free", labeller =label_wrap_gen(width =8)) +scale_color_gmri() +scale_x_continuous(labels =label_number(suffix = deg_c)) +labs(y ="Mass distribution Exponent",x ="Seasonal Bottom Temperature")local_btemp
From this model we see no effect of total landings on the size distribution.
Code
# Clean up the plot:# Plot the predictions over dataf_preds <-as.data.frame(ggpredict( full_model_ar, terms =~ total_weight_lb * area * season)) %>%mutate(area =factor(group, levels = area_levels_long_all),season =factor(facet, levels =c("Spring", "Fall")))#### Trend Posthoc ##### Flag effect fits that are non-significant ###f_emtrend <-emtrends(object = full_model_ar,specs =~ area,var ="total_weight_lb",mode ="appx-satterthwaite" ) %>%as_tibble() %>%mutate(zero =0,non_zero =if_else(between(zero, lower.CL, upper.CL), F, T))# Limit temperature plotting rangeactual_range <- peak_chase_model_df %>%group_by(season, area) %>%summarise(min_f =min(total_weight_lb)-2,max_f =max(total_weight_lb)+2,.groups ="drop")# Plot over observed datalandings_ar_plot <- f_preds %>%left_join(actual_range) %>%filter((x < min_f) == F, (x > max_f) == F) %>%left_join(select(f_emtrend, area,non_zero)) %>%filter(non_zero) %>%ggplot() +geom_ribbon(aes(x, ymin = conf.low, ymax = conf.high, group = season), alpha =0.1) +geom_line(aes(x, predicted, color = season), linewidth =1) +geom_point(data = peak_chase_model_df,aes(total_weight_lb, b, color = season),alpha =0.4,size =1) +facet_grid(area~., scales ="free", labeller =label_wrap_gen(width =8)) +scale_color_gmri() +scale_x_log10(labels =label_log(base =10), limits =10^c(0,10)) +labs(y ="Exponent of ISD",title ="Total Landings (lb.)",x ="Local Seasonal Bottom Temperature Anomaly")landings_ar_plot
Bottom Temperature Only Model?
If landings don’t appear to matter we should remove them and see what the temperature relationship is. These are the results and marginal effects plots from that model.
Code
# Just drop the random effect part for nowtemp_model_ar <- nlme::gls( b ~ area * season *scale(bot_temp), data = peak_chase_model_df, correlation =corAR1(form =~ est_year | area/season))# # confidence intervals on phi# intervals(temp_model_ar)$corStruct# # # check the model# check_model(temp_model_ar)# # Collinearity without interactions# plot(# check_collinearity(# nlme::gls(# b ~ area + season + scale(bot_temp), # data = wigley_bmspectra_model_df, # correlation = corAR1(form = ~ est_year | area/season))# ))# Summary tabletbl_regression(temp_model_ar)
Characteristic
Beta
95% CI
1
p-value
(Intercept)
-1.2
-1.5, -0.97
<0.001
areaGulf of Maine
-0.05
-0.43, 0.34
0.8
areaGeorges Bank
-0.31
-0.76, 0.14
0.2
areaSouthern New England
0.00
-0.33, 0.33
>0.9
areaMid-Atlantic Bight
0.01
-0.27, 0.29
>0.9
seasonFall
-0.16
-0.48, 0.16
0.3
scale(bot_temp)
0.08
-0.24, 0.41
0.6
areaGulf of Maine:seasonFall
0.35
-0.11, 0.81
0.13
areaGeorges Bank:seasonFall
0.67
0.14, 1.2
0.013
areaSouthern New England:seasonFall
-0.03
-0.48, 0.41
0.9
areaMid-Atlantic Bight:seasonFall
-0.53
-1.0, -0.05
0.032
areaGulf of Maine:scale(bot_temp)
-0.11
-0.55, 0.33
0.6
areaGeorges Bank:scale(bot_temp)
-0.20
-0.67, 0.28
0.4
areaSouthern New England:scale(bot_temp)
0.03
-0.37, 0.43
0.9
areaMid-Atlantic Bight:scale(bot_temp)
0.22
-0.16, 0.61
0.3
seasonFall:scale(bot_temp)
-0.02
-0.44, 0.41
>0.9
areaGulf of Maine:seasonFall:scale(bot_temp)
0.09
-0.51, 0.69
0.8
areaGeorges Bank:seasonFall:scale(bot_temp)
0.06
-0.54, 0.65
0.9
areaSouthern New England:seasonFall:scale(bot_temp)
-0.15
-0.68, 0.38
0.6
areaMid-Atlantic Bight:seasonFall:scale(bot_temp)
-0.39
-0.89, 0.12
0.14
1
CI = Confidence Interval
Code
# Clean up the plot:# Plot the predictions over datamod2_preds <-as.data.frame(ggpredict( temp_model_ar, terms =~ bot_temp * area * season)) %>%mutate(area =factor(group, levels = area_levels_long_all),season =factor(facet, levels =c("Spring", "Fall")))#### Trend Posthoc ##### Flag effect fits that are non-significant ###mod2_emtrend <-emtrends(object = temp_model_ar,specs =~ area * season,var ="bot_temp") %>%as_tibble() %>%mutate(zero =0,non_zero =if_else(between(zero, lower.CL, upper.CL), F, T))# Limit temperature plotting rangeactual_range <- peak_chase_model_df %>%group_by(season, area) %>%summarise(min_temp =min(bot_temp)-2,max_temp =max(bot_temp)+2,.groups ="drop")# Plot over observed datalocal_btemp_results <- mod2_preds %>%left_join(actual_range) %>%filter((x < min_temp) == F, (x > max_temp) == F) %>%left_join(select(mod2_emtrend, area, season, non_zero)) %>%filter(non_zero) %>%ggplot() +geom_ribbon(aes(x, ymin = conf.low, ymax = conf.high, group = season), alpha =0.1) +geom_line(aes(x, predicted, color = season), linewidth =1) +geom_point(data = peak_chase_model_df,aes(bot_temp, b, color = season),alpha =0.4,size =1) +facet_grid(area~., scales ="free", labeller =label_wrap_gen(width =8)) +scale_color_gmri() +scale_x_continuous(labels =label_number(suffix = deg_c)) +labs(y ="Mass distribution Exponent",x ="Seasonal Bottom Temperature")local_btemp_results
These results suggest that temperature’s relationship on the size distribution is only significant in MAB-Spring.
My gut says to me that this is not actually the driver, as under closer inspection MAB-Spring has seen a surge in Spiny dogfish.
It also does not make sense that warming spring temperatures are behind that trend, as almost all of those individuals leave the area during the warmer fall season.
There is a seasonality to the community composition here that is not fully explainable by this study.
Do we still see a bergmanns rule? Its weaker now than we saw before.